{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Practice problem: Dimension-Independent Finite Difference Kernel\n",
    "\n",
    "A particular type of problem that is often tricky to address with a single codebase is handling varying dimensionality, i.e. for example handling 1D, 2D and 3D cases in a single code. In this problem, we will practice that for a simple finite difference code that applies a second-order centered finite difference operator:\n",
    "\n",
    "$$\n",
    "f''(x) \\approx  \\frac{f(x+h) - 2 f(x) + f(x-h)}{h^{2}}\n",
    "$$\n",
    "along each axis, summing the results. This implements an $n$-dimensional Laplacian ($\\triangle$) or div-grad operator.\n",
    "\n",
    "To keep things simple, we will not worry about boundary conditions. Also, to keep things simple, we will assume that we have exactly 20 data points in each direction, and we will assume the grid spacing $h$ is 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from mako.template import Template\n",
    "import pyopencl as cl\n",
    "import pyopencl.array\n",
    "import pyopencl.clrandom\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tpl = Template(\"\"\"\n",
    "    kernel void fdiff(global float *out, global float * ary)\n",
    "    {\n",
    "        out[...] = ...\n",
    "    }\n",
    "    \"\"\", strict_undefined=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#clear\n",
    "# Solution\n",
    "\n",
    "\n",
    "\n",
    "tpl = Template(\"\"\"\n",
    "\n",
    "    kernel void fdiff(global float *out, global float * ary)\n",
    "\n",
    "    {\n",
    "\n",
    "        int ibase = \n",
    "\n",
    "            <% stride = 1 %>\n",
    "\n",
    "            %for iax in range(dim):  \n",
    "\n",
    "                + (get_global_id(${iax}) + 1)*${stride}\n",
    "\n",
    "                <% stride *= 20 %>\n",
    "\n",
    "            %endfor\n",
    "\n",
    "            ; \n",
    "\n",
    "        \n",
    "\n",
    "        out[ibase] = -2*${dim}*ary[ibase]\n",
    "\n",
    "            <% stride = 1 %>\n",
    "\n",
    "            %for iax in range(dim):  \n",
    "\n",
    "                + ary[ibase - ${stride}] + ary[ibase + ${stride}]\n",
    "\n",
    "                <% stride *= 20 %>\n",
    "\n",
    "            %endfor\n",
    "\n",
    "            ;\n",
    "\n",
    "    }\n",
    "\n",
    "    \"\"\", strict_undefined=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "    kernel void fdiff(global float *out, global float * ary)\n",
      "    {\n",
      "        int ibase = \n",
      "            \n",
      "                + (get_global_id(0) + 1)*1\n",
      "                \n",
      "                + (get_global_id(1) + 1)*20\n",
      "                \n",
      "                + (get_global_id(2) + 1)*400\n",
      "                \n",
      "            ; \n",
      "        \n",
      "        out[ibase] = -2*3*ary[ibase]\n",
      "            \n",
      "                + ary[ibase - 1] + ary[ibase + 1]\n",
      "                \n",
      "                + ary[ibase - 20] + ary[ibase + 20]\n",
      "                \n",
      "                + ary[ibase - 400] + ary[ibase + 400]\n",
      "                \n",
      "            ;\n",
      "    }\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "dim = 3\n",
    "code = tpl.render(dim=dim)\n",
    "print(code)\n",
    "\n",
    "cl_context = cl.create_some_context()\n",
    "queue = cl.CommandQueue(cl_context)\n",
    "\n",
    "prg = cl.Program(cl_context, code).build()\n",
    "knl = prg.fdiff\n",
    "\n",
    "a = cl.clrandom.rand(queue, (20,)*dim, dtype=np.float32)\n",
    "out = cl.array.empty_like(a)\n",
    "\n",
    "knl(queue, (18,)*dim, None, out.data, a.data)\n",
    "queue.finish()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 0
}